This notebook is a work in progress.
Ultimately I aim to do the following with this notebook:
Wish list for things to add in the future:
So for 1, 2 and 4 are partially implemented.
Notes
Possible Improvements
TO DO
In [184]:
INPUT_FILE = r'Probabilistic Assessment' #target ipynb file, but specified with no extension.
target = '"' + INPUT_FILE + '.ipynb"'
#convert to html
!ipython nbconvert --to html $target
In [185]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(open(INPUT_FILE + r'.html'), 'html5lib')
In [186]:
for tag in soup.find_all('div', 'cell border-box-sizing code_cell rendered'):
#cycle through each cell
for el in tag.find_all('span', 'c'):
#go through each element of the cell.
if el.text == '#hide input':
#If one of the cells contains a comment line that
#says #hide input then that cell will be deleted.
print el
In [187]:
#clear text from input and output boxes (i.e. In [32]:)
#do not destroy the boxes as this messes with the layout.
for tag in soup.find_all('div', 'prompt input_prompt'):
tag.clear()
for tag in soup.find_all('div', 'prompt output_prompt'):
tag.clear()
#destroy anchor links (the bookmark links that appear when you hover over headers)
for tag in soup.find_all('a','anchor-link'):
tag.decompose()
In [188]:
DELETE_INPUTS = False
#this section looks for special commands in the input/output cells to change the appearance of the notebook.
for tag in soup.find_all('div', 'cell border-box-sizing code_cell rendered'):
#cycle through each cell
for el in tag.find_all('span', 'c'):
#go through each element of the cell.
if el.text == '#hide input':
#If one of the cells contains a comment line that
#says #hide input then that cell will be deleted.
for x in tag.find_all('div','input'):
x.decompose()
break
elif el.text == '#hide input/output':
#deletes entire cell block
tag.decompose()
break
elif el.text == '#hide output':
#delete all output blocks
#tag.find('div','output_wrapper').decompose() #this isn't working for some reason
for x in tag.find_all('div','output_wrapper'):
x.decompose()
break
#delete all the input cells
if DELETE_INPUTS:
for tag in soup.find_all('div', 'input'):
#cycle through each cell and delete all input cells
tag.decompose()
#@media print{*{text-shadow:none !important;color:#000 !important;background:transparent !important;box-shadow:none !important} a,a:visited{text-decoration:underline} a[href]:after{content:" (" attr(href) ")"} abbr[title]:after{content:" (" attr(title) ")"} .ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""} pre,blockquote{border:1px solid #999;page-break-inside:avoid} thead{display:table-header-group} tr,img{page-break-inside:avoid} img{max-width:100% !important} @page {margin:.5cm}p,h2,h3{orphans:3;widows:3} h2,h3{page-break-after:avoid}}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;line-height:20px;color:#000;background-color:#fff}
In [189]:
# ipython notebooks appears to automatically embed the css into the html file
# this finds and removes a string in the embedded stylesheet that forces all code cells to be black when printed.
for tag in soup.find_all('style', type='text/css'):
#finds some css tags and replaces them using agricultural means
s = tag.text.replace('@media print{*{text-shadow:none !important;color:#000 !important;',
'@media print{*{text-shadow:none !important;') #remove the change to monochrome text colours
s = s.replace('@page {margin:0.5cm}', '@page {margin:1.5cm}') #increase page margins
s = s.replace('blockquote{border:1px solid #999;page-break-inside:avoid}',
'blockquote{page-break-inside:avoid}') #remove ridiculous looking border from blockquotes that print
tag.string = s
In [190]:
#change custom stylesheet to be loaded to one specific for the notebook
#<link href="custom.css" rel="stylesheet"/>
for tag in soup.find_all('link', href='custom.css', rel='stylesheet'):
tag['href'] = 'sea-style.css'
In [191]:
#shrink the svg's by 70% so they don't overlap the sides of the text
from re import match, I
for tag in soup.find_all('svg'):
current_val = tag['height']
#use regex to split the string into numbers and string
m = match(r"([0-9]+)([a-z]+)", current_val, I)
if match:
items = m.groups()
tag['height'] = str(0.7*float(items[0]))+items[1] #multiply the height by 0.8 and reinsert into html
In [192]:
# Add numbering to heading 2 and 3
headingcounter = {'h1':0,'h2':0,'h3':0}
for tag in soup.find_all(['h2','h3']):
#print tag.name, tag.text
if tag.name == 'h1':
headingcounter['h1']+=1
elif tag.name == 'h2':
headingcounter['h2']+=1
tag.string = str(headingcounter['h2']) + r'. ' + tag.string
elif tag.name == 'h3':
headingcounter['h3']+=1
tag.string = str(headingcounter['h2']) + '.' + str(headingcounter['h3']) + '. ' + tag.string
In [193]:
#write html back to html file
write_target = INPUT_FILE + r'.html'
text_file = open(write_target, "w")
#text_file.write(soup.prettify().encode("UTF-8"))
text_file.write(soup.encode("UTF-8")) #when you prettify it it fucks up the superscripts with extra blank lines
text_file.close()
In [194]:
#convert to pdf
#wkhtml seems to be struggling with the rendering of LaTeX. Seems to be because of poor compatibility with MathJax.
#pdf_target = '"'+ INPUT_FILE + r'.pdf"'
#!wkhtmltopdf '"' + $write_target + '" ' + $INPUT_FILE
In [195]:
#open the html to test
from platform import system
print system()
if system() == 'Windows':
open_string = r'"C:\Program Files\Google\Chrome\Application\chrome.exe" "' + write_target + '"'
elif system() == 'Darwin': #mac os x. needs checking
open_string = r'/usr/bin/open -a "/Applications/Google Chrome.app" "' + write_target + '"'
#open the new html file in chrome
#if you don't have chrome installed, or have it installed in a different place, it'll probably crack the shits
!$open_string